import re

import bs4
import requests

pattern = re.compile(r'(?<!https:)\/\/')


def fix_url(url):
    url = pattern.sub('', url)
    if not url.startswith('https://'):
        url = 'https://' + url
    return url


resp = requests.get('https://sohu.com/index.html')
if resp.status_code == 200:
    soup = bs4.BeautifulSoup(resp.text, 'html.parser')
    anchors = soup.select('div.list16 > ul > li > a')
    for anchor in anchors:
        print(type(anchor))
        # 通过标签对象的attrs属性的索引操作获取指定的属性值
        print(anchor.attrs['title'])
        print(fix_url(anchor.attrs['href']))
    print('程序结束!!!')
else:
    print('无法获取页面')
